
*******************************
*This code is an adapted version of the code provided by Wojciech Hardy at the Institute for Structural Research, www.ibs.org.pl/resources/. 
*******************************


//Data files, downloaded from: https://www.onetcenter.org/database.html.
* Abilities.xlsx
* Skills.xlsx
* Work Activities.xlsx
* Work Context.xlsx

global source "[Source Folder]"   

//insert the path to your crosswalks directory.
global crosswalks "[Crosswalk Folder]"

//insert the path for your output files (the do-file will create several .dta files along the way - one for each classification).
global output "[Output Folder]"


clear all	

//save the data in .dta format.
import excel using "$source/Abilities.xlsx", firstrow clear
	rename *, lower
save "$source/Abilities.dta", replace

import excel using "$source/Skills.xlsx", firstrow clear
	rename *, lower
save "$source/Skills.dta", replace

import excel using "$source/Work Activities.xlsx", firstrow clear
	rename *, lower
save "$source/Work Activities.dta", replace

import excel using "$source/Work Context.xlsx", firstrow clear
	rename *, lower
save "$source/Work Context.dta", replace



//append the prepared O*NET data, but only the needed variables
clear all
append using "$source/Abilities.dta", keep(scaleid datavalue onetsoccode elementid)
append using "$source/Skills.dta", keep(scaleid datavalue onetsoccode elementid)
append using "$source/Work Context.dta", keep(scaleid datavalue onetsoccode elementid)
append using "$source/Work Activities.dta", keep(scaleid datavalue onetsoccode elementid)

//keep only the needed measurements 
keep if scaleid=="IM" | scaleid=="CX"
drop scaleid

//simplify values and names
rename datavalue score
replace elementid=subinstr(elementid, ".", "", 5) 

//reshape so that each ONET-SOC code has one observation with all task measures */
reshape wide score, i(onetsoccode) j(elementid) string

//simplify names
renpfix score t_

//some correction for the calculation of task contents (scale reversion of selected items)
gen t_4C3b8_rev=6-t_4C3b8
gen t_4C1a2l_rev=6-t_4C1a2l
gen t_4C2a3_rev=6-t_4C2a3
foreach var in t_4A4a4 t_4A4a5 t_4A4a8 t_4A4b5 t_4A1b2 t_4A3a2 t_4A3a3 t_4A3a4 t_4A3b4 t_4A3b5 {
	gen `var'_rev=6-`var'
}

//keep only needed items
keep onetsoccode t_4A2a4 t_4A2b2 t_4A4a1 t_4A4a4 t_4A4b4 t_4A4b5 t_4C3b7 t_4C3b4 t_4C3b8_rev t_4C3d3 t_4A3a3 t_4C2d1i t_4A3a4 t_4C2d1g t_1A2a2 t_1A1f1 t_2B1a t_4C1a2l_rev t_4A4a5_rev t_4A4a8_rev t_4A1b2_rev t_4A3a2_rev t_4A3b4_rev t_4A3b5_rev

//final cleaning
sort onetsoccode
rename onetsoccode onetsoc10


*******the following lines will convert the values to other classifications, averaging them along the way, by classification codes*******
*******the code will save the data in each classification along the way, modify this as necessary if you only want to acquire one final file*******

//saving the clean, O*NET-SOC 10 data
save "$output/onetsoc10.dta", replace



//from O*NET-SOC 10 to O*NET-SOC 09
use "$output/onetsoc10.dta", clear
	joinby onetsoc10 using "$crosswalks/onetsoc09_onetsoc10.dta"
	collapse (mean) t_* , by(onetsoc09)
save "$output/onetsoc09.dta", replace

//from O*NET-SOC 10 to SOC 10
use "$output/onetsoc10.dta", clear
	replace onetsoc10 = subinstr(onetsoc10, "-", "", 1)
	destring onetsoc10, replace
	gen soc10=int(onetsoc10)
	collapse (mean) t_* , by(soc10)
save "$output/soc10.dta", replace


//from O*NET-SOC 09 na SOC 00
use "$output/onetsoc09.dta", clear
	replace onetsoc09 = subinstr(onetsoc09, "-", "", 1)
	destring onetsoc09, replace
	gen soc00=int(onetsoc09)
	collapse (mean) t_* , by(soc00)
save "$output/soc00.dta", replace

//from SOC 10 to ISCO-08
use "$output/soc10.dta", clear
	joinby soc10 using "$crosswalks/soc10_isco08.dta"
	collapse (mean) t_*, by(isco08)
save "$output/isco08.dta", replace

//from SOC 00 to ISCO-88
use "$output/soc00.dta", clear
	joinby soc00 using "$crosswalks/isco88_soc00.dta"
	collapse (mean) t_* , by(isco88)
	destring isco88, replace
	drop if isco88==.
save "$output/isco88.dta", replace



//Add Interpersonal Task Intensity (ITI), data due to Orhun Sevinc 
import excel using "$source/ITI_SOC.xlsx", firstrow clear
gen soc10_X = substr(soc10,1,5)
gen soc10_XX = substr(soc10,1,4)
gen soc10_XXX = substr(soc10,1,3)
gen soc10_XXXX = substr(soc10,1,2)
replace soc10 = subinstr(soc10, "X", "",.) 
replace soc10 = subinstr(soc10, "Y", "",.) 
replace soc10_X = subinstr(soc10_X, "X", "",.) 
replace soc10_X = subinstr(soc10_X, "Y", "",.) 
replace soc10_XX = subinstr(soc10_XX, "X", "",.) 
replace soc10_XXX = subinstr(soc10_XXX, "X", "",.) 
destring, replace
save "$source/ITI_SOC_ManX.dta", replace

use "$source/ITI_SOC_ManX.dta"
replace soc10 =. if soc10<100000
replace soc10_X =. if soc10_X<10000
replace soc10_XX =. if soc10_XX<1000
replace soc10_XXX =. if soc10_XXX<100
replace soc10_XXXX =. if soc10_XXXX<10
save "$source/ITI_SOC_ManX.dta", replace

use "$source/ITI_SOC_ManX.dta"
drop if missing(soc10)
save "$source/ITI_SOC_Man0.dta"

use "$source/ITI_SOC_ManX.dta"
drop if !missing(soc10)
drop soc10
drop if missing(soc10_X)
save "$source/ITI_SOC_Man1.dta"

use "$source/ITI_SOC_ManX.dta", clear
drop if !missing(soc10_X)
drop soc10 soc10_X
drop if missing(soc10_XX)
duplicates drop soc10_XX, force
save "$source/ITI_SOC_Man2.dta", replace

use "$source/ITI_SOC_ManX.dta"
drop if !missing(soc10_XX)
drop soc10 soc10_X soc10_XX
drop if missing(soc10_XXX)
duplicates drop soc10_XXX, force
save "$source/ITI_SOC_Man3.dta", replace

use "$source/ITI_SOC_ManX.dta"
drop if !missing(soc10_XXX)
drop soc10 soc10_X soc10_XX soc10_XXX
drop if missing(soc10_XXXX)
save "$source/ITI_SOC_Man4.dta"
//Manually dropped duplicates for 194 and 475

//Need variables soc10_X, etc. also in data to merge with:
use  "$output/soc10.dta", clear
tostring soc10, replace
gen soc10_X = substr(soc10,1,5)
gen soc10_XX = substr(soc10,1,4)
gen soc10_XXX = substr(soc10,1,3)
gen soc10_XXXX = substr(soc10,1,2)
destring, replace

save "$output/soc10_X.dta", replace

//Now, merge step by step, matching the maximum number of digits
use "$output/soc10_X.dta", clear
merge 1:1 soc10 using "$source/ITI_SOC_Man0.dta"
drop if _merge==2
rename _merge Man0_merge

replace soc10_X=999999 if  Man0_merge==3
merge m:1 soc10_X using "$source/ITI_SOC_Man1.dta", update
rename _merge Man1_merge
replace soc10_XX=999999 if  Man1_merge==4 | Man0_merge==3

merge m:1 soc10_XX using "$source/ITI_SOC_Man2.dta", update
rename _merge Man2_merge
replace soc10_XXX=999999 if  Man1_merge==4 | Man0_merge==3 | Man2_merge==4

merge m:1 soc10_XXX using "$source/ITI_SOC_Man3.dta", update
rename _merge Man3_merge
replace soc10_XXXX=999999 if  Man1_merge==4 | Man0_merge==3 | Man2_merge==4 | Man3_merge==4

merge m:1 soc10_XXXX using "$source/ITI_SOC_Man4.dta", update
rename _merge Man4_merge

save "$output/soc10_ITImerge.dta", replace

//from SOC 10 to ISCO-08
use "$output/soc10_ITImerge.dta", clear
	joinby soc10 using "$crosswalks/soc10_isco08.dta"
	collapse (mean) t_* iti, by(isco08)
	save "$output/soc10_ITImerge_isco08.dta", replace

//Swedish part, using cross-walk data from SCB
//ssyk12 till isco08
import excel using "$source/ssyk2012_isco-08.xlsx", firstrow sheet(Stata) clear
	rename *, lower
	duplicates drop
	sort ssyk_2012 isco08
	duplicates drop ssyk_2012, force
save "$source/ssyk12_to_isco08.dta", replace

use "$output/soc10_ITImerge_isco08.dta"
joinby isco08 using "$source/ssyk12_to_isco08.dta"
save "$output/ssyk12_isco08.dta", replace

//ssyk96 till ssyk12
import excel using "$source/ssyk96_ssyk2012.xlsx", firstrow sheet(Stata) clear
	rename *, lower
	rename ssyk2012 ssyk_2012
	rename ssyk96 ssyk_96
	duplicates drop
	sort ssyk_96 ssyk_2012 
	duplicates drop ssyk_96, force
save "$source/ssyk96_to_ssyk12.dta", replace

use "$output/ssyk12_isco08.dta", clear
joinby ssyk_2012 using "$source/ssyk96_to_ssyk12.dta"
save "$output/ssyk96_12_isco08.dta", replace
